#!/home/chunwei/chunenv/bin/python
# -*- coding: utf-8 -*-
import sys
import pandas as pd
import numpy as np
import datetime as d
import argparse
'''
对用户最近的event数目进行分析

包括特征：

event_[overall]_count:  各个event总的数目
event_[overall]_count.latest.5: 最近5天
event_[overall]_count.latest.10: 最近10天
'''
def parse_args():
    if len(sys.argv) == 1:
        sys.argv.append('-h')

    parser = argparse.ArgumentParser()
    parser.add_argument('time_path')
    parser.add_argument('output')
    args = vars(parser.parse_args())
    return args

args = parse_args()
time_path = args['time_path']
output = args['output']

course_time = pd.read_csv(time_path)

time_format = "%Y-%m-%dT%H:%M:%S"

out_data = pd.DataFrame()
out_data['course_id'] = course_time.course_id

events = ["overall", "problem", "video", "access", "wiki", "discussion", "nagivate", "page_close"]
for event in events:
    key = "event_%s_date" % event
    out_key = "event_%s_count" % event
    out_data[out_key] = getattr(course_time, key).map(lambda x: len(x.split()), na_action='ignore')

print out_data.head()

def get_latest_event_count(rcd, N):
    count = 0
    dates = [d.datetime.strptime(date, time_format) for date in rcd.split()]
    latest_date = dates[-1]
    for date in dates:
        if (latest_date - date).days <= N:
            count += 1
    return count

for N in (5, 10):
    for event in events:
        key = "event_%s_date" % event
        out_key = "event_%s_count.latest.%d" % (event, N)
        out_data[out_key] = getattr(course_time, key).map(lambda x:get_latest_event_count(x, N),  na_action='ignore')

print out_data.head()

print 'output to ', output

out_data.to_csv(output, index=False)
